Read prepared data.

subscriptions <- read_rds('../data/subscriptions.rds')
summary(subscriptions)
   customerid       subscriptionid       periodend           revenuecurr        revenuecurrinclvat
 Min.   :   10006   Min.   :     154   Min.   :2004-03-15   Min.   :      0.8   Min.   :      1   
 1st Qu.: 1109200   1st Qu.: 5195644   1st Qu.:2013-04-10   1st Qu.:     15.0   1st Qu.:     15   
 Median : 5404300   Median :15011583   Median :2015-04-03   Median :     63.2   Median :     79   
 Mean   : 8160520   Mean   :14758473   Mean   :2014-09-14   Mean   :   1371.7   Mean   :   1393   
 3rd Qu.:14665703   3rd Qu.:24306942   3rd Qu.:2016-11-19   3rd Qu.:    174.4   3rd Qu.:    218   
 Max.   :24113207   Max.   :29648411   Max.   :2020-03-27   Max.   :2823000.0   Max.   :2823000   
                                                                                                  
 billingcurrency    startmonth            endmonth          isthreetoonesubs      months      
 DKK    :669182   Min.   :2003-12-01   Min.   :2004-03-01   Min.   :0.00000   Min.   : 1.000  
 EUR    :538177   1st Qu.:2013-01-01   1st Qu.:2013-04-01   1st Qu.:0.00000   1st Qu.: 1.000  
 USD    :513522   Median :2014-12-01   Median :2015-04-01   Median :0.00000   Median : 3.000  
 NOK    :232949   Mean   :2014-05-16   Mean   :2014-08-31   Mean   :0.03175   Mean   : 3.527  
 SEK    :159662   3rd Qu.:2016-08-01   3rd Qu.:2016-11-01   3rd Qu.:0.00000   3rd Qu.: 3.000  
 GBP    : 98063   Max.   :2018-03-01   Max.   :2020-03-01   Max.   :1.00000   Max.   :24.000  
 (Other): 89456                                                                               
    status        num_previous_months num_previous_subs num_previous_months_binned firstpaiddate       
 active:1859624   Min.   :  0.00      Min.   : 0.00     Min.   : 0.00              Min.   :2003-12-15  
 churn : 441387   1st Qu.:  1.00      1st Qu.: 1.00     1st Qu.: 1.00              1st Qu.:2011-01-01  
                  Median :  8.00      Median : 3.00     Median : 8.00              Median :2013-07-01  
                  Mean   : 15.33      Mean   : 6.02     Mean   :14.69              Mean   :2013-01-30  
                  3rd Qu.: 22.00      3rd Qu.: 8.00     3rd Qu.:26.00              3rd Qu.:2015-09-10  
                  Max.   :162.00      Max.   :69.00     Max.   :39.00              Max.   :2018-03-28  
                                                                                                       
 channelcat      paymentperiodchosenatstart    currency        marketname       siteverkey    
 paid :1533098   Min.   :-1.000             DKK    :669320   DK     :669659   US     :916164  
 viral: 767913   1st Qu.: 3.000             USD    :551762   NO     :233280   DK     :637648  
                 Median : 3.000             EUR    :542911   US     :192217   NO     :207389  
                 Mean   : 4.242             NOK    :232831   SE     :160491   SE     :144376  
                 3rd Qu.: 3.000             SEK    :159469   FR     :147157   FR     : 96237  
                 Max.   :24.000             GBP    : 95683   (Other):897812   NL     : 80165  
                                            (Other): 49035   NA's   :   395   (Other):219032  
 firstpaidmonth        firstdevice          segment        isquickpurchase  productversion   
 Min.   :2003-12-01   desktop: 186075   business: 417088   Min.   :0.0000   v_3    : 904354  
 1st Qu.:2011-01-01   mobile :  75406   other   :  99772   1st Qu.:0.0000   v_4    : 152485  
 Median :2013-07-01   NA's   :2039530   personal: 300111   Median :1.0000   v_older:1243898  
 Mean   :2013-01-16                     NA's    :1484040   Mean   :0.5291   NA's   :    274  
 3rd Qu.:2015-09-01                                        3rd Qu.:1.0000                    
 Max.   :2018-03-01                                        Max.   :1.0000                    
                                                           NA's   :274                       
   isfreemium                                model31224      threetoonestartdate    market_category  
 Min.   :0.0000   pre-changes                     :1904931   Min.   :2017-02-14   DK        :669659  
 1st Qu.:0.0000   3-12-24m-2016-08-25             : 150403   1st Qu.:2017-03-14   LowGeneric:269691  
 Median :0.0000   3-12m-v1-2015-11-30             : 146501   Median :2017-04-18   NO        :233280  
 Mean   :0.2602   12m-v2-2015-05-16               :  72411   Mean   :2017-05-25   US        :192217  
 3rd Qu.:1.0000   ex-subscriptionplanpaywallexpand:   8512   3rd Qu.:2017-09-06   SE        :160491  
 Max.   :1.0000   ex-uglyjerry12mswitch           :   5429   Max.   :2017-12-19   FR        :147157  
 NA's   :274      (Other)                         :  12824   NA's   :2144362      (Other)   :628516  
 siteverkey_cat siteverkey_cat2 chosen_subs_length isthreetoonestate  gdppercapita     
 ORG:1384847    MUT:1317341     1  :  56972        Min.   :0.00000   Min.   :   218.3  
 SS : 916164    ORG: 401813     -1 :  11274        1st Qu.:0.00000   1st Qu.: 42013.3  
                SS : 581857     12 :  52199        Median :0.00000   Median : 55670.9  
                                24 :   7112        Mean   :0.04074   Mean   : 51846.7  
                                3  : 210111        3rd Qu.:0.00000   3rd Qu.: 60637.3  
                                gen:1963343        Max.   :1.00000   Max.   :108422.5  
                                                                                       
 gdppercapita_scaled                           subscription_summary
 Min.   :-2.4363     mc-DK_ssc-MUT_ac-39_m-3_ccsl-gen    : 109730  
 1st Qu.:-0.4640     mc-DK_ssc-MUT_ac-26_m-3_ccsl-gen    :  77692  
 Median : 0.1805     mc-LowGeneric_ssc-SS_ac-0_m-1_ccsl-3:  62637  
 Mean   : 0.0000     mc-DK_ssc-ORG_ac-0_m-3_ccsl-gen     :  58395  
 3rd Qu.: 0.4148     mc-DK_ssc-MUT_ac-38_m-3_ccsl-gen    :  52038  
 Max.   : 2.6698     mc-DK_ssc-ORG_ac-3_m-3_ccsl-gen     :  41923  
                     (Other)                             :1898596  
            subscription_summary_no_market
 ssc-MUT_ac-26_m-3_ccsl-gen: 204356       
 ssc-MUT_ac-39_m-3_ccsl-gen: 171697       
 ssc-SS_ac-0_m-1_ccsl-3    : 167341       
 ssc-ORG_ac-0_m-3_ccsl-gen : 132399       
 ssc-MUT_ac-8_m-3_ccsl-gen : 120692       
 ssc-MUT_ac-38_m-3_ccsl-gen: 111606       
 (Other)                   :1392920       
subscriptions_with_target <- subscriptions %>%
  # restrict to a recent expiry window
  filter(endmonth >= begin_train_window & endmonth < end_window) %>%
  mutate(num_previous_months_binned_fct = as.factor(num_previous_months_binned)) %>%
  mutate(set_type = as.factor(if_else(endmonth >= begin_validation_window, 'validation', 'training'))) %>%
  
  mutate(churnind = ifelse(status == 'churn', 1, 0)) 

Prepare churntable that we want to predict.

churntable <- subscriptions_with_target %>%
  
  group_by(set_type, siteverkey_cat2, market_category, months, num_previous_months_binned, chosen_subs_length, subscription_summary_no_market) %>%
  summarise(num_obs = n(), 
            churned = sum(churnind)) %>%
  
  group_by(set_type) %>%
  mutate(churn_rate = churned / num_obs,
         renew_rate = 1 - churn_rate,
         month_churn = 1 - renew_rate ^ (1/as.double(months)),
         log_month_churn = log(month_churn),
         weight = num_obs / sum(num_obs))
# NB! Does this introduce a bad bias ????
churntable_no_zeros <- churntable %>%
  filter(churn_rate > 0)

Train model

new_model=glm(log_month_churn ~ market_category + subscription_summary_no_market, data=churntable_no_zeros[churntable_no_zeros$set_type == 'training', ], weights = weight)

Model validation for training (2017-01-01 - 2017-08-01) and validation (2017-09-01 - 2018-01-01) sets:

prediction_table <- validation(subscriptions_with_target, new_model, predict_2fct_model)

validation_plots(prediction_table, minimal_share = 0.01)
NAs introduced by coercion

Try simple logistic model

model_logit <- glm(churnind ~ market_category + siteverkey_cat2 + num_previous_months_binned + months + chosen_subs_length,
                   data = subscriptions_with_target[subscriptions_with_target$set_type == 'training',], family = 'binomial')

Model validation for training (2017-01-01 - 2017-08-01) and validation (2017-09-01 - 2018-01-01) sets:

prediction_table_logit <- validation(subscriptions_with_target, model_logit)

validation_plots(prediction_table_logit, minimal_share = 0.01)
NAs introduced by coercion

LS0tCnRpdGxlOiAiQ2h1cm46IFR3byBkaW1lbnNpb25zIGFuZCBwcmVkaWN0IGxvZyBvZiBtb250aGx5IGNodXJuIHByb2JhYmlsaXR5IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Kc291cmNlKCdjb25maWcuUicpCnNvdXJjZSgndXRpbHMuUicpCnNvdXJjZSgndXRpbHNfdmFsaWRhdGlvbi5SJykKCiMgc291cmNlKCduZXdfbGlmZXRpbWVwcmVkaWN0b3IuUicpCmBgYAoKUmVhZCBwcmVwYXJlZCBkYXRhLgoKYGBge3IgcmVhZGluZywgdGlkeT1GfQpzdWJzY3JpcHRpb25zIDwtIHJlYWRfcmRzKCcuLi9kYXRhL3N1YnNjcmlwdGlvbnMucmRzJykKYGBgCgpgYGB7cn0Kc3VtbWFyeShzdWJzY3JpcHRpb25zKQpgYGAKCmBgYHtyfQpzdWJzY3JpcHRpb25zX3dpdGhfdGFyZ2V0IDwtIHN1YnNjcmlwdGlvbnMgJT4lCiAgIyByZXN0cmljdCB0byBhIHJlY2VudCBleHBpcnkgd2luZG93CiAgZmlsdGVyKGVuZG1vbnRoID49IGJlZ2luX3RyYWluX3dpbmRvdyAmIGVuZG1vbnRoIDwgZW5kX3dpbmRvdykgJT4lCiAgbXV0YXRlKG51bV9wcmV2aW91c19tb250aHNfYmlubmVkX2ZjdCA9IGFzLmZhY3RvcihudW1fcHJldmlvdXNfbW9udGhzX2Jpbm5lZCkpICU+JQogIG11dGF0ZShzZXRfdHlwZSA9IGFzLmZhY3RvcihpZl9lbHNlKGVuZG1vbnRoID49IGJlZ2luX3ZhbGlkYXRpb25fd2luZG93LCAndmFsaWRhdGlvbicsICd0cmFpbmluZycpKSkgJT4lCiAgCiAgbXV0YXRlKGNodXJuaW5kID0gaWZlbHNlKHN0YXR1cyA9PSAnY2h1cm4nLCAxLCAwKSkgCmBgYAoKClByZXBhcmUgY2h1cm50YWJsZSB0aGF0IHdlIHdhbnQgdG8gcHJlZGljdC4KCmBgYHtyfQpjaHVybnRhYmxlIDwtIHN1YnNjcmlwdGlvbnNfd2l0aF90YXJnZXQgJT4lCiAgCiAgZ3JvdXBfYnkoc2V0X3R5cGUsIHNpdGV2ZXJrZXlfY2F0MiwgbWFya2V0X2NhdGVnb3J5LCBtb250aHMsIG51bV9wcmV2aW91c19tb250aHNfYmlubmVkLCBjaG9zZW5fc3Vic19sZW5ndGgsIHN1YnNjcmlwdGlvbl9zdW1tYXJ5X25vX21hcmtldCkgJT4lCiAgc3VtbWFyaXNlKG51bV9vYnMgPSBuKCksIAogICAgICAgICAgICBjaHVybmVkID0gc3VtKGNodXJuaW5kKSkgJT4lCiAgCiAgZ3JvdXBfYnkoc2V0X3R5cGUpICU+JQogIG11dGF0ZShjaHVybl9yYXRlID0gY2h1cm5lZCAvIG51bV9vYnMsCiAgICAgICAgIHJlbmV3X3JhdGUgPSAxIC0gY2h1cm5fcmF0ZSwKICAgICAgICAgbW9udGhfY2h1cm4gPSAxIC0gcmVuZXdfcmF0ZSBeICgxL2FzLmRvdWJsZShtb250aHMpKSwKICAgICAgICAgbG9nX21vbnRoX2NodXJuID0gbG9nKG1vbnRoX2NodXJuKSwKICAgICAgICAgd2VpZ2h0ID0gbnVtX29icyAvIHN1bShudW1fb2JzKSkKCiMgTkIhIERvZXMgdGhpcyBpbnRyb2R1Y2UgYSBiYWQgYmlhcyA/Pz8/CmNodXJudGFibGVfbm9femVyb3MgPC0gY2h1cm50YWJsZSAlPiUKICBmaWx0ZXIoY2h1cm5fcmF0ZSA+IDApCmBgYAoKVHJhaW4gbW9kZWwKCmBgYHtyfQpuZXdfbW9kZWw9Z2xtKGxvZ19tb250aF9jaHVybiB+IG1hcmtldF9jYXRlZ29yeSArIHN1YnNjcmlwdGlvbl9zdW1tYXJ5X25vX21hcmtldCwgZGF0YT1jaHVybnRhYmxlX25vX3plcm9zW2NodXJudGFibGVfbm9femVyb3Mkc2V0X3R5cGUgPT0gJ3RyYWluaW5nJywgXSwgd2VpZ2h0cyA9IHdlaWdodCkKCndyaXRlX3JkcyhuZXdfbW9kZWwsICcuLi9kYXRhL21vZGVscy9jaHVybl9tb2RlbC5yZHMnKQpgYGAKCk1vZGVsIHZhbGlkYXRpb24gZm9yIHRyYWluaW5nICgyMDE3LTAxLTAxIC0gMjAxNy0wOC0wMSkgYW5kIHZhbGlkYXRpb24gKDIwMTctMDktMDEgLSAyMDE4LTAxLTAxKSBzZXRzOgoKKiBTdW1tYXJ5IHRhYmxlIGNvbnRhaW5pbmcKICAgICsgTnVtYmVyIG9mIG9ic2VydmF0aW9ucyB3aXRob3V0IHByZWRpY3Rpb24KICAgICsgQVVDLCBsb2dsb3NzIC0gcHJlZGljdGlvbiBxdWFsaXR5IG1ldHJpY3MKKiBST0MgY3VydmUKKiBQbG90cyBwZXIgbWFya2V0CiAgICArIEFnZSBvZiBjdXN0b21lcnMgdnMgcmVhbCBhbmQgcHJlZGljdGVkIHByb2JhYmlsaXR5IG9mIGNodXJuIGZvciBkaWZmZXJlbnQgc3Vic2NyaXB0aW9uIGxlbmd0aHMuIEl0IHNob3dzIGlmIHdlIGFyZSBjb3JyZWN0bHkgcHJlZGljdGluZyBwcm9iYWJpbGl0eSBvZiBjaHVybiBmb3IgY3VzdG9tZXJzIG92ZXIgbGlmZXRpbWUuCiAgICArIENhbGlicmF0aW9uIC0gUHJlZGljdGVkIHByb2JhYmlsaXR5IG9mIGNodXJuIHZzIHJlYWwgcHJvYmFiaWxpdHkgb2YgY2h1cm4gZm9yIGRpZmZlcmVudCBzdWJzY3JpcHRpb24gbGVuZ3RocyAod2VsbCBjYWxpYnJhdGVkIHByZWRpY3Rpb24gc2hvdWxkIGZvcm0gYSBkaWFnb25hbCBsaW5lKS4gU2hvd3MgaWYgb3V0Y29tZSBvZiBtb2RlbCBpbiBxdWVzdGlvbiBjYW4gYmUgcmVhbHkgdHJlYXRlZCBhcyBwcm9iYWJpbGl0eS4KCmBgYHtyIHdhcm5pbmc9Rn0KcHJlZGljdGlvbl90YWJsZSA8LSB2YWxpZGF0aW9uKHN1YnNjcmlwdGlvbnNfd2l0aF90YXJnZXQsIG5ld19tb2RlbCwgcHJlZGljdF8yZmN0X21vZGVsKQpgYGAKCmBgYHtyIGZpZy5oZWlnaHQ9MTAsIGZpZy53aWR0aD03LCB3YXJuaW5nPUZ9CnZhbGlkYXRpb25fcGxvdHMocHJlZGljdGlvbl90YWJsZSwgbWluaW1hbF9zaGFyZSA9IDAuMDEpCmBgYAoKVHJ5IHNpbXBsZSBsb2dpc3RpYyBtb2RlbApgYGB7cn0KbW9kZWxfbG9naXQgPC0gZ2xtKGNodXJuaW5kIH4gbWFya2V0X2NhdGVnb3J5ICsgc2l0ZXZlcmtleV9jYXQyICsgbnVtX3ByZXZpb3VzX21vbnRoc19iaW5uZWQgKyBtb250aHMgKyBjaG9zZW5fc3Vic19sZW5ndGgsCiAgICAgICAgICAgICAgICAgICBkYXRhID0gc3Vic2NyaXB0aW9uc193aXRoX3RhcmdldFtzdWJzY3JpcHRpb25zX3dpdGhfdGFyZ2V0JHNldF90eXBlID09ICd0cmFpbmluZycsXSwgZmFtaWx5ID0gJ2Jpbm9taWFsJykKYGBgCgoKTW9kZWwgdmFsaWRhdGlvbiBmb3IgdHJhaW5pbmcgKDIwMTctMDEtMDEgLSAyMDE3LTA4LTAxKSBhbmQgdmFsaWRhdGlvbiAoMjAxNy0wOS0wMSAtIDIwMTgtMDEtMDEpIHNldHM6CgoqIFN1bW1hcnkgdGFibGUgY29udGFpbmluZwogICAgKyBOdW1iZXIgb2Ygb2JzZXJ2YXRpb25zIHdpdGhvdXQgcHJlZGljdGlvbgogICAgKyBBVUMsIGxvZ2xvc3MgLSBwcmVkaWN0aW9uIHF1YWxpdHkgbWV0cmljcwoqIFJPQyBjdXJ2ZQoqIFBsb3RzIHBlciBtYXJrZXQKICAgICsgQWdlIG9mIGN1c3RvbWVycyB2cyByZWFsIGFuZCBwcmVkaWN0ZWQgcHJvYmFiaWxpdHkgb2YgY2h1cm4gZm9yIGRpZmZlcmVudCBzdWJzY3JpcHRpb24gbGVuZ3Rocy4gSXQgc2hvd3MgaWYgd2UgYXJlIGNvcnJlY3RseSBwcmVkaWN0aW5nIHByb2JhYmlsaXR5IG9mIGNodXJuIGZvciBjdXN0b21lcnMgb3ZlciBsaWZldGltZS4KICAgICsgQ2FsaWJyYXRpb24gLSBQcmVkaWN0ZWQgcHJvYmFiaWxpdHkgb2YgY2h1cm4gdnMgcmVhbCBwcm9iYWJpbGl0eSBvZiBjaHVybiBmb3IgZGlmZmVyZW50IHN1YnNjcmlwdGlvbiBsZW5ndGhzICh3ZWxsIGNhbGlicmF0ZWQgcHJlZGljdGlvbiBzaG91bGQgZm9ybSBhIGRpYWdvbmFsIGxpbmUpLiBTaG93cyBpZiBvdXRjb21lIG9mIG1vZGVsIGluIHF1ZXN0aW9uIGNhbiBiZSByZWFseSB0cmVhdGVkIGFzIHByb2JhYmlsaXR5LgpgYGB7ciB3YXJuaW5nPUZ9CnByZWRpY3Rpb25fdGFibGVfbG9naXQgPC0gdmFsaWRhdGlvbihzdWJzY3JpcHRpb25zX3dpdGhfdGFyZ2V0LCBtb2RlbF9sb2dpdCkKYGBgCgpgYGB7ciBmaWcuaGVpZ2h0PTEwLCBmaWcud2lkdGg9Nywgd2FybmluZz1GfQp2YWxpZGF0aW9uX3Bsb3RzKHByZWRpY3Rpb25fdGFibGVfbG9naXQsIG1pbmltYWxfc2hhcmUgPSAwLjAxKQpgYGA=